package util;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.SAXException;


import java.io.*;


import org.apache.tika.exception.TikaException;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.sax.BodyContentHandler;

import java.io.IOException;

/**
 * description
 * created by A on 2022/4/7
 */
public class TikaUtil {
    /**
     *
     * @param is 请传入一个InputStream流
     * @return  返回文档内容
     */
    public static String tikaParse(InputStream is) throws IOException {
        try {
            String str = "";
            //Tika默认只能读取100000个字符以内的文档，这里改成-1即可解除限制
            BodyContentHandler textHandler = new BodyContentHandler(-1); //存储文档主体
            Metadata metadata = new Metadata();  //Metadata对象保存了作者,标题等元素
            AutoDetectParser parser = new AutoDetectParser(); //当调用parse,AutoDetectParser会自动估计文档MiME类型
            parser.parse(is, textHandler, metadata);  //执行解析过程，填入三个参数,inputstream，metadata,context
            str = textHandler.toString();
            return str;
        }catch (TikaException ex){
            return "文件未去除水印";
        }catch(SAXException ex){
            return "文件未去除水印";
        }
    }

}
